% close all
tic;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%  IN THIS SECTION OF THE CODE, WE COMPUTE THE PRODUCTIVE
%%%%%%%%%%%%%%%%  EFFICIENCY TYPE, thetaE
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
load('g2.mat')
load('ProductionData.mat')



ProductionData.contract = zeros(size(ProductionData.k)) ;
studlist = unique(ProductionData.sid); 
ProductionDataNonCum = ProductionData ; 
for ii=1:length(studlist)
    indx = find(ProductionDataNonCum.sid==studlist(ii)) ; 
    temp = ProductionDataNonCum.t(indx) ; 
    temp = [temp(1); (temp(2:end)-temp(1:end-1))] ; 
    ProductionDataNonCum.t(indx) = temp ; 
    ProductionData.contract(ProductionData.sid==studlist(ii)) = g2.contract1(g2.student_id==studlist(ii))...
                                                                + 2*g2.contract2(g2.student_id==studlist(ii)) + 3*g2.contract3(g2.student_id==studlist(ii)) ;
    ProductionDataNonCum.contract(ProductionDataNonCum.sid==studlist(ii)) = ProductionData.contract(ProductionData.sid==studlist(ii)) ;
end

%%
%%%%The following block of code looks for outliers and replaces them with the within-person mean
%%%%worktime.
lowQ_id         = [] ;
lowQ_time       = [] ;
lowQ_prescore   = [] ;
lowQ_contract   = [] ;
tempid = unique(ProductionDataNonCum.sid) ;
CUTOFF = mean(ProductionDataNonCum.t)+5*std(ProductionDataNonCum.t) ;   %%%%This will be used to flag extreme outliers more than 5 standard deviations above the unconditional mean work time
for ii=1:length(tempid) 
    temp = sort(ProductionDataNonCum.t(ProductionDataNonCum.sid==tempid(ii))); 
    if length(temp)>0 && length(temp)<4 %#ok  %%%%In this case, the within-student sample is small, 
                                              %%%%so we pool them together and compute a conditional 
                                              %%%%within-child mean using cross-child data and 
                                              %%%%sample weights.  The following object that will be 
                                              %%%%useful for this task at the end... 
        lowQ_id   = [lowQ_id; tempid(ii)] ; %#ok 
        lowQ_time = [lowQ_time; temp] ; %#ok 
        lowQ_contract = [lowQ_contract; 1*g2.contract1(g2.student_id==tempid(ii)) + 2*g2.contract2(g2.student_id==tempid(ii)) + 3*g2.contract3(g2.student_id==tempid(ii))] ; %#ok
        lowQ_prescore = [lowQ_prescore; g2.prescore(g2.student_id==tempid(ii))] ; %#ok 
    elseif length(temp)>=4 
        btemp=2.345*std(temp)*(length(temp)^-0.2); %%%%This is the optimal bandwidth for a kernel smoothed density estimator based on an Epanechnikov kernel.
        %%%%If two consecutive datapoints have more distance between them than this, then the kernel density will
        %%%%attain a value of zero somewhere between.  We use a full-support criterion to identify outlier observations.
        tempindx = find(temp(2:end)-temp(1:end-1)>2*btemp);
        if ~isempty(tempindx)
            cutoff = temp(min(tempindx)) ; %this is the last index to keep from temp
            temp2 = temp(temp<=cutoff);
            btemp2=2.345*std(temp2)*(length(temp2)^-0.2); %std(temp2); %
            tempindx2 = find(temp2(2:end)-temp2(1:end-1)>2*btemp2);
            stopflag = isempty(tempindx2) ;
            while stopflag == 0
                cutoff = temp2(min(tempindx2)) ; %this is the last index to keep from temp
                temp2   = temp(temp<=cutoff) ;
                btemp2=2.345*std(temp2)*(length(temp2)^-0.2); %std(temp2); %
                tempindx2 = find(temp2(2:end)-temp2(1:end-1)>2*btemp2);
                stopflag = isempty(tempindx2) ;
                btemp = btemp2 ;
            end
            cutoff = min(cutoff,CUTOFF) ;  %%%%This line imposes another sanity check: we flag any production times that are more than 5 standard deviations above the unconditional mean, which is what CUTTOFF is.
            withintruncmean = mean(ProductionDataNonCum.t(ProductionDataNonCum.sid==tempid(ii) & ProductionDataNonCum.t<=cutoff)) ;
            ProductionDataNonCum.t(ProductionDataNonCum.sid==tempid(ii) & ProductionDataNonCum.t>cutoff) = withintruncmean ;
            if unfinished.time(unfinished.sid==tempid(ii))>cutoff
                unfinished.time(unfinished.sid==tempid(ii)) = withintruncmean ;
            end
        else  %%%%Even if there are no outliers in the sample of regular work times, the unfinished time may still be an outlier
            cutoff = max(temp) + 2*btemp ; 
            cutoff = min(cutoff,CUTOFF) ;
            withintruncmean = mean(ProductionDataNonCum.t(ProductionDataNonCum.sid==tempid(ii) & ProductionDataNonCum.t<=cutoff)) ;
            ProductionDataNonCum.t(ProductionDataNonCum.sid==tempid(ii) & ProductionDataNonCum.t>cutoff) = withintruncmean ;
            if unfinished.time(unfinished.sid==tempid(ii))>cutoff
                unfinished.time(unfinished.sid==tempid(ii)) = withintruncmean ;
            end
        end
    end
end

%%%%This next loop will help us to tackle outliers within the set of students with fewer than 4
%%%%observations:
lowQ_time = sort(lowQ_time) ;
btemp     = 2.345*std(lowQ_time)*(length(lowQ_time)^-0.2) ;
tempindx  = find(lowQ_time(2:end)-lowQ_time(1:end-1)>2*btemp) ;
while ~isempty(tempindx)
    temp2    = lowQ_time(1:min(tempindx)) ;
    btemp    = 2.345*std(temp2)*(length(temp2)^-0.2) ;
    tempindx = find(temp2(2:end)-temp2(1:end-1)>2*btemp) ;
end
temp = lowQ_time ; temp(tempindx+1) = [] ;
cutoff = min( max(temp)+2*btemp,CUTOFF ) ;
btemp      = std(g2.prescore) ;  
for ii=1:length(lowQ_id)  %here we compute sampling weights for each of ii's same-contract classmates
    iiscore    = lowQ_prescore(ii) ;
    iicontract = lowQ_contract(ii) ;
    compgroup  = lowQ_id(lowQ_contract==iicontract & lowQ_prescore>=iiscore-btemp & lowQ_prescore<=iiscore+btemp ) ;
    tempsamp   = [] ; 
    for jj=1:length(compgroup)
        tempsamp = [tempsamp; ProductionDataNonCum.t(ProductionDataNonCum.sid==compgroup(jj) & ProductionDataNonCum.t<cutoff)] ; %#ok
    end
    withintruncmean = mean(tempsamp) ;  %%%%Given the above few lines, this is the mean of work times within a child's classmates who are of the same contract group and had a pre-score within one standard deviation of the child's own pre-score
    ProductionDataNonCum.t(ProductionDataNonCum.sid==lowQ_id(ii) & ProductionDataNonCum.t>cutoff) = withintruncmean ;
    if unfinished.time(unfinished.sid==lowQ_id(ii))>cutoff
        unfinished.time(unfinished.sid==lowQ_id(ii)) = withintruncmean ;
    end
end









%%
EPointEst = ThetaEEstimator(ProductionDataNonCum,1) ; 
%%
NijmanVerbeek = NVtest(ProductionDataNonCum) 








